// `define TEST_FINAL
//`define TEST_FIRST
function [31:0] ifdiv_test;
    input  [31:0]  a;
    input  [31:0]  b;
    input  [1:0]   type_a;
    input  [1:0]   type_b;
    input  [1:0]   type_q;

    shortreal  a_int;
    shortreal  b_int;
    shortreal  q_int;
    integer temp;
    integer tempa;
    integer tempb;
    logic [31:0] temp_q;
    
    begin
        casex(type_a)
            2'b00: begin
                 a_int = real'({1'b0,a});
            end
            2'b01: begin
                 a_int = real'($signed(a));
            end
            2'b1x: begin
                 a_int = $bitstoshortreal(a);
            end
        endcase

        casex(type_b)
            2'b00: begin
                 b_int = real'({1'b0,b});
            end
            2'b01: begin
                 b_int = real'($signed(b));
            end
            2'b1x: begin
                 b_int = $bitstoshortreal(b);
            end
        endcase

        //a_int = $bitstoshortreal(a);
        //b_int = $bitstoshortreal(b);

        q_int = a_int / b_int;

        ifdiv_test = $shortrealtobits(q_int);

        //casex(type_q)
        //    2'b00: begin
        //         temp_q = $shortrealtobits(q_int);
        //         temp = unsigned'(q_int-0.375);
        //         ifdiv_test = temp;
        //    end
        //    2'b01: begin
        //         temp_q = $shortrealtobits(q_int);
        //         temp = signed'(q_int-0.375);
        //         ifdiv_test = temp;
        //    end
        //    2'b1x: begin
        //         ifdiv_test = $shortrealtobits(q_int);
        //    end
        //endcase   
    end
endfunction



module pixel_shader_check(
input clk,
input rst_n,
output [31:0] s,
output [31:0] t,
output reg [31:0] matrix_A,
output reg [31:0] matrix_B,
output reg [31:0] matrix_C,
output reg [31:0] matrix_D,
output reg [31:0] matrix_E,
output reg [31:0] matrix_F,
output reg [31:0] matrix_G,
output reg [31:0] matrix_H,
output reg fragment_valid,
output [31:0] tex_addr,
output [31:0] tex_width,
output [31:0] tex_height,
output reg [1:0]  wrap_mode_s,
output reg [1:0]  wrap_mode_t,
input fragment_tex_shading_busy,

input [7:0] r_tex,
input [7:0] g_tex,
input [7:0] b_tex,
input [7:0] a_tex,
input fragment_tex_valid,
input     [31:0]  bus_addr ,
input             bus_cmd  ,
input     [5:0]   bus_bl   ,
output reg     [31:0]  bus_dout ,
output reg             bus_den ,
output reg l2_cache_busy,
input [19:0]  ram_addr,
input         ram_wrn,
input         ram_csn,
input [127:0] ram_wdata,
output reg [127:0] ram_rdata   
        
        );

reg [7:0] tex_buffer [2**21-1:0]; //16 MB tex buffer
reg [63:0] st_buffer[1023:0];

reg [259:0] parameter_buffer[63:0];

reg [31:0] data[65535:0];
reg [63:0] sfu_rom[511:0];

assign tex_addr = 0;
assign tex_width = 100;
assign tex_height = 200;



initial begin
   $readmemb("rom64v512.mif",sfu_rom);
end 

integer i;
integer j;
      
initial
begin
for(i=0;i<1024;i=i+1)
   st_buffer[i] = {$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff)}; 
end

initial
begin
for(i=0;i<64;i=i+1)
   parameter_buffer[i] = {$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff),$urandom_range(32'h0,32'hffffffff),4'b0};//$urandom_range(2'h0,2'h1),$urandom_range(2'h0,2'h1)}; 
end  

logic [31:0] s_trans;
logic [31:0] t_trans;
logic [27:0] s_final;
logic [27:0] t_final;
logic [31:0] matrix_calA;
logic [31:0] matrix_calB; 
logic [31:0] matrix_calC; 
logic [31:0] matrix_calD; 
logic [31:0] matrix_calE; 
logic [31:0] matrix_calF; 
logic [31:0] matrix_calG; 
logic [31:0] matrix_calH; 
logic [1:0] s_wrap;
logic [1:0] t_wrap;

logic [31:0] s_cal;
logic [31:0] t_cal;
logic [31:0] u;
logic [31:0] v;






integer st_counter;
integer param_counter;

logic [31:0] addr1;
logic [31:0] addr2;
logic [31:0] addr3;
logic [31:0] addr4;
logic [15:0] param1_temp;
logic [15:0] param2_temp; 
logic [15:0] param3_temp; 
logic [15:0] param4_temp; 
logic [7:0] param1;
logic [7:0] param2; 
logic [7:0] param3; 
logic [7:0] param4; 
logic [7:0] r1;
logic [7:0] r2;
logic [7:0] r3;
logic [7:0] r4;
logic [7:0] g1;
logic [7:0] g2;
logic [7:0] g3;
logic [7:0] g4; 
logic [7:0] b1;
logic [7:0] b2;
logic [7:0] b3;
logic [7:0] b4; 
logic [7:0] a1;
logic [7:0] a2;
logic [7:0] a3;
logic [7:0] a4; 

logic [15:0] r_final;
logic [15:0] g_final; 
logic [15:0] b_final; 
logic [15:0] a_final;

logic [64:0] s_temp;
logic [64:0] t_temp;
logic [64:0] w_temp_64;

logic [31:0] w_temp;
logic sign_w;
logic [63:0] sfu_data;

logic [31:0] leading_zero;

logic [63:0] w_cal0;

logic [31:0] w_cal1;
logic [31:0] w_cal2; 
logic [31:0] shift;
logic left_right;
logic [16:0] LSB;
logic [6:0] MSB;
logic [11:0] L_M;
logic [21:0] L_M_SQ_FULL;
logic [11:0] L_M_SQ;
logic [28:0] C0;
logic [17:0] C1;
logic [11:0] C2;
logic [34:0] C1_X;
logic [23:0] C2_X;
logic [22:0] C1_X_q;
logic [16:0] C2_X_q;

logic [28:0] result_adder;
logic [29:0] result_temp;
logic [31:0] result;
logic [31:0] w_rcp;
logic [63:0] ax;
logic [63:0] dy;
logic [63:0] bx;
logic [63:0] ey; 
logic [63:0] cx;
logic [63:0] fy; 

logic [31:0] w_64_clz;
logic [64:0] w_64_temp;
shortreal s_trans_temp;
shortreal t_trans_temp;

shortreal s_float;
shortreal t_float;
shortreal w_float;
logic [31:0] s_float_bit;
logic [31:0] t_float_bit;
logic [31:0] w_float_bit;

logic [31:0] s_trans_temp_bit;
logic [31:0] t_trans_temp_bit; 
logic [63:0] sxw;
logic [63:0] txh;
wire [63:0] test;
assign test = s_temp/w_temp_64;





logic [127:0] ram0[2**20-1:0];
logic [7:0] rd_tmp;
logic [127:0] ram_init;


initial
begin
    j=0;
    for(i=0;i<2**21;i=i+1)
    begin
        rd_tmp = $urandom_range(0,255);
        tex_buffer[i] = rd_tmp;

        ram_init = {rd_tmp,ram_init[127:8]}; 
        if((i-i/16*16)== 15)
        begin
            ram0[i/16] = ram_init;
        end
    end
end

always @(posedge clk)
begin
    if(!ram_csn && !ram_wrn)
        ram0[ram_addr] <= ram_wdata;
end

always @(negedge clk)
begin
    if(!ram_csn && !ram_wrn)
        ram_rdata <= ram_wdata;
    else if(!ram_csn && ram_wrn)
        ram_rdata <={tex_buffer[16*ram_addr[16:0]+15],tex_buffer[16*ram_addr[16:0]+14],tex_buffer[16*ram_addr[16:0]+13],tex_buffer[16*ram_addr[16:0]+12],tex_buffer[16*ram_addr[16:0]+11],tex_buffer[16*ram_addr[16:0]+10],tex_buffer[16*ram_addr[16:0]+9],tex_buffer[16*ram_addr[16:0]+8],tex_buffer[16*ram_addr[16:0]+7],tex_buffer[16*ram_addr[16:0]+6],tex_buffer[16*ram_addr[16:0]+5],tex_buffer[16*ram_addr[16:0]+4],tex_buffer[16*ram_addr[16:0]+3],tex_buffer[16*ram_addr[16:0]+2],tex_buffer[16*ram_addr[16:0]+1],tex_buffer[16*ram_addr[16:0]+0]};//ram0[ram_addr[16:0]];
end                                 






initial
begin
    sign_w = 0;
    for(param_counter=0;param_counter<64;param_counter=param_counter+1)
    begin
        `ifdef TEST_FIRST
        {matrix_calA,matrix_calB,matrix_calC,matrix_calD,matrix_calE,matrix_calF,matrix_calG,matrix_calH,s_wrap,t_wrap} = parameter_buffer[10];
        `else
        {matrix_calA,matrix_calB,matrix_calC,matrix_calD,matrix_calE,matrix_calF,matrix_calG,matrix_calH,s_wrap,t_wrap} = parameter_buffer[param_counter];
        `endif

        for(st_counter=0;st_counter<1024;st_counter=st_counter+1)
        begin

            `ifdef TEST_FIRST
            {s_cal,t_cal} = st_buffer[222];
            `else            
            {s_cal,t_cal} = st_buffer[st_counter];
            `endif 
            ax = $signed(s_cal) *$signed( matrix_calA);
            bx = $signed(s_cal) *$signed( matrix_calB);
            cx = $signed(s_cal) *$signed( matrix_calC); 
            dy =$signed( matrix_calD) *$signed( t_cal);
            ey =$signed( matrix_calE) *$signed( t_cal);
            fy =$signed( matrix_calF) *$signed( t_cal);
            s_temp = ($signed(ax) +$signed(dy)  +$signed( {matrix_calG,16'h0000}));
            t_temp = ($signed(bx) +$signed(ey)  +$signed( {matrix_calH,16'h0000}));
            w_temp_64 = ($signed(cx) +$signed(fy)  + $signed(34'h100000000));
            s_float=real'($signed(s_temp));
            t_float=real'($signed(t_temp));
            s_float=s_float/16;
            t_float=t_float/16;
            w_float=real'($signed(w_temp_64));
            w_float=w_float/65536;
            w_float=w_float/65536;
            s_float_bit = (s_float);
            t_float_bit = (t_float);
            w_float_bit = (w_float);  

            s_trans_temp = s_float/w_float;
            t_trans_temp = t_float/w_float;


            s_trans_temp_bit= $shortrealtobits(s_trans_temp);
            t_trans_temp_bit= $shortrealtobits(t_trans_temp);

            s_trans =(signed' (s_trans_temp - 0.375));
            t_trans =(signed' (t_trans_temp - 0.375));

            case(s_wrap)
                 0:
                     begin
                        if(s_trans >0)
                            s_final = s_trans[27:0];
                        else
                            s_final = ~s_trans[27:0] + 1;
                     end
                 1:
                     begin
                        if(s_trans >29'h10000000)
                            s_final = 28'hfffffff;
                        else if(s_trans < 0 )
                            s_final = 0; 
                        else
                            s_final = s_trans[27:0];  
                     end 
                 default:
                     begin
                        if(s_trans >0)
                            s_final = s_trans[27:0];
                        else
                            s_final = ~s_trans[27:0] + 1;
                     end 

                    
            endcase

            case(t_wrap)
                 0:
                     begin
                        if(t_trans >0)
                            t_final = t_trans[27:0];
                        else
                            t_final = ~t_trans[27:0] + 1;
                     end
                 1:
                     begin
                        if(t_trans >29'h10000000)
                            t_final = 28'hfffffff;
                        else if(t_trans < 0 )
                            t_final = 0; 
                        else
                            t_final = t_trans[27:0];  
                     end 
                 default:
                     begin
                        if(t_trans >0)
                            t_final = t_trans[27:0];
                        else
                            t_final = ~t_trans[27:0] + 1;
                     end 

                    
            endcase 

               sxw = tex_width * s_final;
               txh = tex_height * t_final;
               u= sxw >> 20;
               v= txh >> 20;

               if(u[7:0] !=0 || v[7:0] != 0)
                begin
                   addr1 = u[23:8] + v[23:8] * tex_width;
                   addr2 = (u[23:8]+1) + v[23:8] * tex_width; 
                   addr3 = u[23:8] + (v[23:8]+1) * tex_width;
                   addr4 = u[23:8]+1 + (v[23:8]+1) * tex_width;

                   param1_temp= ((9'h100-u[7:0]) * (9'h100 - v[7:0]));
                   param2_temp= ((u[7:0]) * (9'h100 - v[7:0]));
                   param3_temp= ((9'h100-u[7:0]) * (v[7:0]));
                   param4_temp= ((u[7:0]) * (v[7:0])); 
                   param1= (param1_temp >> 8) + param1_temp[7];
                   param2= (param2_temp >> 8) + param2_temp[7];
                   param3= (param3_temp >> 8) + param3_temp[7];
                   param4= (param4_temp >> 8) + param4_temp[7];
                   r1 = tex_buffer[addr1[14:0]*4+2];
                   g1 = tex_buffer[addr1[14:0]*4+1]; 
                   b1 = tex_buffer[addr1[14:0]*4+0]; 
                   a1 = tex_buffer[addr1[14:0]*4+3]; 
                   r2 = tex_buffer[addr2[14:0]*4+2];
                   g2 = tex_buffer[addr2[14:0]*4+1]; 
                   b2 = tex_buffer[addr2[14:0]*4+0]; 
                   a2 = tex_buffer[addr2[14:0]*4+3]; 
                   r3 = tex_buffer[addr3[14:0]*4+2];
                   g3 = tex_buffer[addr3[14:0]*4+1]; 
                   b3 = tex_buffer[addr3[14:0]*4+0]; 
                   a3 = tex_buffer[addr3[14:0]*4+3]; 
                   r4 = tex_buffer[addr4[14:0]*4+2];
                   g4 = tex_buffer[addr4[14:0]*4+1]; 
                   b4 = tex_buffer[addr4[14:0]*4+0]; 
                   a4 = tex_buffer[addr4[14:0]*4+3];
                   r_final = (r1 * param1 + r2*param2 + r3 *param3 + r4 *param4)>>8;
                   g_final = (g1 * param1 + g2*param2 + g3 *param3 + g4 *param4)>>8;
                   b_final = (b1 * param1 + b2*param2 + b3 *param3 + b4 *param4)>>8;
                   a_final = (a1 * param1 + a2*param2 + a3 *param3 + a4 *param4)>>8;
                   data[param_counter*1024 + st_counter]  ={a_final[7:0],r_final[7:0],g_final[7:0],b_final[7:0]};
                end
                else
                begin
                  addr1 = u[23:8] + v[23:8] * tex_width;
                   r1 = tex_buffer[addr1[21:0]*4+2];
                   g1 = tex_buffer[addr1[21:0]*4+1]; 
                   b1 = tex_buffer[addr1[21:0]*4+0]; 
                   a1 = tex_buffer[addr1[21:0]*4+3];
                   data[param_counter*1024 + st_counter]  ={a1,r1,g1,b1}; 
                end
        end
    end
end
genvar bus_count;

reg [255:0]  bus_dout_reg;

wire [31:0] test_data = data[65535];

generate
    for(bus_count =0;bus_count<32;bus_count=bus_count + 1)
        always@(posedge clk)
         begin
            if(bus_cmd)
                bus_dout_reg[8*bus_count+7:8*bus_count] <= tex_buffer[bus_addr[15:0] + bus_count];
         end
endgenerate

reg [255:0] bus_dout_regshift;
reg bus_cmd_ff;
always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        begin
           bus_cmd_ff <= 0;
        end
    else 
        bus_cmd_ff <= bus_cmd;
end 


always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
        begin
           bus_dout_regshift <= 256'b0;
        end
    else if(bus_cmd_ff)
        begin
           bus_dout_regshift <= bus_dout_reg;
        end
    else if(bus_den)
          bus_dout_regshift <= bus_dout_regshift >> 64;  
end   

always@(*)
begin

        begin
           bus_dout <= bus_dout_regshift[63:0];
        end
end

reg [31:0] busy_count;
reg cmd_flag;

always@(posedge clk or negedge rst_n)
    begin
       if(~rst_n)
          cmd_flag <= 0;
       else if(bus_cmd )
          cmd_flag <=1;  
       else if(bus_den && busy_count == 32'd1238)
          cmd_flag <= 0;
    end  

always@(posedge clk or negedge rst_n)
    begin
       if(~rst_n)
          busy_count <= 0;
        else if(busy_count == 32'd1238)
          busy_count <= 0;  
       else if(bus_cmd || cmd_flag)
          busy_count <= busy_count + 1;  
    end

always@(posedge clk or negedge rst_n)
    begin
       if(~rst_n)
          bus_den <= 0;
       else if(cmd_flag && busy_count == 32'd1234)
          bus_den <= 1;
       else if(bus_den && busy_count == 32'd1238)
          bus_den <= 0;  
    end 



reg [31:0] st_counter_1;
reg [31:0] tex_counter;

wire round_flag;
reg [31:0] round_counter;

always@(posedge clk or negedge rst_n)
    begin
        if(~rst_n)
          st_counter_1 <= 0;
        else if(st_counter_1 == 32'd1023 && fragment_valid) 
          st_counter_1 <= 0;
        else if(fragment_valid)  
          st_counter_1 <= st_counter_1 + 1;  
    end

    reg round_flag_reg;
always@(posedge clk or negedge rst_n)
    begin
        if(~rst_n)
          round_flag_reg <= 0;
        else if(st_counter_1 == 32'd1023 && fragment_valid) 
          round_flag_reg <= 1;
        else if(round_counter == 32'd3000)  
          round_flag_reg <= 0;  
    end 

    assign round_flag = round_flag_reg || (st_counter_1 == 32'd1023 && fragment_valid); 

    always@(posedge clk or negedge rst_n)
    begin
        if(~rst_n)
          round_counter <= 0;
        else if(round_flag) 
          round_counter <= round_counter + 1;
        else  
          round_counter <= 0; 
    end 

always@(posedge clk or negedge rst_n)
    begin
        if(~rst_n)
          tex_counter <= 0;
        else if(st_counter_1 == 32'd1023 && fragment_valid) 
          tex_counter <= tex_counter + 1;
 
 
    end      
    `ifdef TEST_FINAL
    assign s = st_buffer[1023][63:32];
    assign t = st_buffer[1023][31:0]; 


     always@(posedge clk or negedge rst_n)
        begin
            if(~rst_n)
               {matrix_A,matrix_B,matrix_C,matrix_D,matrix_E,matrix_F,matrix_G,matrix_H,wrap_mode_s,wrap_mode_t} = parameter_buffer[63]; 
           // else if(round_counter == 32'd3000)
           //  {matrix_A,matrix_B,matrix_C,matrix_D,matrix_E,matrix_F,matrix_G,matrix_H,wrap_mode_s,wrap_mode_t} = parameter_buffer[tex_counter[5:0]];       
        end 
    `elsif TEST_FIRST
    assign s = st_buffer[222][63:32];
    assign t = st_buffer[222][31:0]; 


     always@(posedge clk or negedge rst_n)
        begin
            if(~rst_n)
               {matrix_A,matrix_B,matrix_C,matrix_D,matrix_E,matrix_F,matrix_G,matrix_H,wrap_mode_s,wrap_mode_t} = parameter_buffer[10]; 
           // else if(round_counter == 32'd3000)
           //  {matrix_A,matrix_B,matrix_C,matrix_D,matrix_E,matrix_F,matrix_G,matrix_H,wrap_mode_s,wrap_mode_t} = parameter_buffer[tex_counter[5:0]];       
        end 
    `else
    assign s = st_buffer[st_counter_1[9:0]][63:32];
    assign t = st_buffer[st_counter_1[9:0]][31:0]; 


     always@(posedge clk or negedge rst_n)
        begin
            if(~rst_n)
               {matrix_A,matrix_B,matrix_C,matrix_D,matrix_E,matrix_F,matrix_G,matrix_H,wrap_mode_s,wrap_mode_t} = parameter_buffer[0]; 
            else if(round_counter == 32'd3000)
             {matrix_A,matrix_B,matrix_C,matrix_D,matrix_E,matrix_F,matrix_G,matrix_H,wrap_mode_s,wrap_mode_t} = parameter_buffer[tex_counter[5:0]];       
        end
    `endif

    reg fragment_valid_reg;
    always@(posedge clk or negedge rst_n)
    begin
    if(~rst_n)
        fragment_valid_reg <= 0;
    else if(~round_flag && ~fragment_tex_shading_busy)
        fragment_valid_reg <= 1;
    else
        fragment_valid_reg <= 0;
    end

    always@(*)
    begin
        fragment_valid = fragment_valid_reg && ~fragment_tex_shading_busy; 
    end  

    always@(posedge clk or negedge rst_n)
    begin
    if(~rst_n)
        l2_cache_busy <= 0;
    else if(bus_cmd)
        l2_cache_busy <= 1;
    else if(bus_den && busy_count == 32'd1238)
        l2_cache_busy <= 0;
    end   


    reg [31:0] fragment_in_counter;

   always@(posedge clk or negedge rst_n)
   begin
       if(~rst_n)
          fragment_in_counter <= 0;
       else if(fragment_tex_valid)
          fragment_in_counter <= fragment_in_counter + 1;  
   end 

   reg error_texel;

   always@(posedge clk or negedge rst_n)
   begin
       if(~rst_n)
          error_texel <= 0;
       else if(fragment_tex_valid && {a_tex,r_tex,g_tex,b_tex} != data[fragment_in_counter])
          error_texel <= 1; 
       else
          error_texel <= 0;  
   end 

   reg error_texel_with_lth;
   reg error_texel_with_mth;   
   reg error_texel_with_hth;   
   wire [7:0] a_true_minus_a_data =  (a_tex > data[fragment_in_counter][31:24]) ? (a_tex - data[fragment_in_counter][31:24]) :(data[fragment_in_counter][31:24] - a_tex);
   wire [7:0] r_true_minus_a_data =  (r_tex > data[fragment_in_counter][23:16]) ? (r_tex - data[fragment_in_counter][23:16]) :(data[fragment_in_counter][23:16] - r_tex);
   wire [7:0] g_true_minus_a_data =  (g_tex > data[fragment_in_counter][15:8])  ? (g_tex - data[fragment_in_counter][15:8])  :(data[fragment_in_counter][15:8]  - g_tex);
   wire [7:0] b_true_minus_a_data =  (b_tex > data[fragment_in_counter][7:0])   ? (b_tex - data[fragment_in_counter][7:0])   :(data[fragment_in_counter][7:0]   - b_tex);
   wire [9:0] abs_diff = a_true_minus_a_data + r_true_minus_a_data + g_true_minus_a_data + b_true_minus_a_data;

   always@(posedge clk or negedge rst_n)
   begin
       if(~rst_n)
          error_texel_with_lth <= 0;
       else if(fragment_tex_valid && (abs_diff > 5))
          error_texel_with_lth <= 1; 
       else
          error_texel_with_lth <= 0;  
   end 

   always@(posedge clk or negedge rst_n)
   begin
       if(~rst_n)
          error_texel_with_mth <= 0;
       else if(fragment_tex_valid && (abs_diff > 10))
          error_texel_with_mth <= 1; 
       else
          error_texel_with_mth <= 0;  
   end   

   always@(posedge clk or negedge rst_n)
   begin
       if(~rst_n)
          error_texel_with_hth <= 0;
       else if(fragment_tex_valid && (abs_diff > 15))
          error_texel_with_hth <= 1; 
       else
          error_texel_with_hth <= 0;  
   end   

   wire [31:0] data_true =  data[fragment_in_counter];
    

endmodule
